library(deSolve)
library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(ggspatial)
library(ggplot2)
library(gganimate)
library(gifski)
library(plotly)
library(hrbrthemes)
library(gridExtra)
library(magick)

Introduction

Harmful Algal Blooms are sudden growths of aquatic, photosynthetic organisms deleterious to their environment. Blooms can release toxins, cause eutrophication, and prevent sunlight from reaching below the surface. Multiple reports in the past decades have connected these blooms with anthropogenic climate change, suggesting that human impact on our planet will lead to increased frequency and intensity of blooms. This report visualizes the temporal trends in blooms and attempts to model their growth. Data were taken from the Harmful Algal Event Database (HAEDAT), a resource managed by UNESCO. Visualizations were created using the ggplot2 and sf packages, while animation and interactive were created using the gganimate and plotly packages.

# read data and create df for use and remove years with incomplete data
hab <- read.csv("hab_world.csv") %>%
  rename(Year = eventYear)%>%
  filter(Year>1989) %>%
  filter(Year<2021)
#create crs for shapefile data
crs_use <- "+proj=laea +lat_0=30 +lon_0=-95"
 #remove data without sf data to map
hab_sub <- hab %>% drop_na(latitude)%>%
  drop_na(longitude)
#turn hab dataframe into sf format
hab_sf <- st_as_sf(x = hab_sub,                         
                   coords = c("latitude", "longitude"),
                   crs = crs_use)

# create a sf dataframe of the world
world <- ne_countries(scale = "medium", returnclass = "sf") 
# create map of hab events overlayed on world map by year
map <- ggplot(data = hab_sub) +
  geom_point(aes(x = longitude, y = latitude),
             color = "#69b3a2",
             size = 2.5) +
  geom_sf(data = world) + 
    annotation_north_arrow(location = "bl", which_north = "true", 
        pad_x = unit(0.75, "in"), pad_y = unit(0.5, "cm"),
        style = north_arrow_fancy_orienteering) + 
        labs(title = "Harmful Algal Blooms in {frame_time}") + 
        transition_time(Year) + 
        theme(axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(color = "white",
                                          fill = "white")) + 
    ggtitle('Harmful Algal Blooms in {frame_time}')

#create map with cumulative data
map_with_shadow <- map +
  shadow_mark() +
  ggtitle('Cumulative Harmful Algal Blooms by {frame_time}')


#create year values
num_years <- max(hab_sub$Year) - min(hab_sub$Year) + 1

#create animation 
hab_an <- animate(map, nframes = num_years, fps = 2, end_pause = TRUE)

#create cumulative animation
hab_an_cum <- animate(map_with_shadow, nframes = num_years, fps = 2, end_pause = TRUE)

a_mgif <- image_read(hab_an)
b_mgif <- image_read(hab_an_cum)

new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for(i in 1:30){
  combined <- image_append(c(a_mgif[i], b_mgif[i]))
 new_gif <- c(new_gif, combined)
}

new_gif

Figure 1. The location of all Harmful Algal Blooms in green observed each year from 1990-2020. In the left facet data is displayed year by year, while in the right facet datapoints are retained year by year, showing cumulative blooms from 1990.

Methods and Results

After visualizing HAB events, their trend over time was explored. A least squared regression line showed that there was a strong relationship between time and the number of blooms (p=3.458e-05). However, the data had a clear non-linear aspect to it. Using the deSolve package, logistic growth was used in an attempt to more accurately model HAB events over time. After graphing the model (Fig 2), its accuracy was tested by plotting the modeled data against observed values to show the difference between expected and observed numbers of blooms. Model validity was tested by comparing sum of squares: the linear model had a value of 39,5878.9, while the exponential model came out to 28,1618.6. This matched the eye test and confirmed that the growth in blooms could was best described using a logistic model.

# count number of HAB/year
hab_year <- hab %>% group_by(Year) %>%
  count(Year) %>%
  rename(HABfrequency = n)
#find linear fit
lm_fit <- lm(HABfrequency ~ Year, data=hab_year)
# summary(lm_fit)
#add simulated linear data to dataset
hab_year_sim <- hab_year %>%
  mutate("simulated" = Year*14.486- 28665.010)
#logistic growth function
logistic_growth <- function(t, N, p) {
  with (as.list(p), {
    dNdt <- rate*N*(1-N/K)
    return(list(dNdt))
  })
}

# definding time variables
T_init = 1990
T_lim = 2020
t = seq(T_init,T_lim,1)

# initial population
N_init <- c(N = 40)

#logistic parameters
estimatedRate <- 0.325
estimatedK <- 500

# parameters of the model
pars_logistic <- c (
  rate = estimatedRate, # rate at which the population changes
  K = estimatedK ) # limiting capacity

# solve ode and produce simulated data
N_t <- ode(y = N_init, times = t, parms = pars_logistic, func = logistic_growth)

# convert simulated data into tibble
df_sim_logistic <- as_tibble(N_t) %>% 
  mutate(Year = as.numeric(time),
         N_logistic = as.numeric(N)) %>%
  select(-time,-N)

# join the data and the simulated data
year_log <- hab_year %>%
  left_join(df_sim_logistic, by = c("Year" = "Year"))
# plot hab events per year and fit a linear regression line
p <- ggplot(data=hab_year_sim, aes(x=Year, y=HABfrequency)) +
  geom_point(color='#69b3a2') +
  geom_line(color='#69b3a2') +
  geom_point(aes(x=Year, y=simulated), color = 'red') +
  geom_ribbon(aes(x=Year, ymin=simulated, ymax=HABfrequency),
                fill="#69b3a2", alpha = 0.5) +
  geom_line(aes(x=Year, y=simulated), color = 'red') +
    ylab("HAB's") +
    theme_light() +
    theme(legend.position = "none")

p1 <- ggplotly(p)

 #calculate sum of squares for linear and logistic models
# sum((df_sim_logistic$N_logistic - hab_year$HABfrequency)^2)
# sum((hab_year_sim$simulated - hab_year$HABfrequency)^2)

# the observed prevalences:
p_dif <- ggplot(data=year_log) +
    geom_point(aes(x=Year, y=HABfrequency, color='Observed')) +
    geom_line(aes(x=Year, y=HABfrequency, color='Observed')) +
    geom_point(aes(x=Year, y=N_logistic, color = 'Predicted')) +
    geom_ribbon(aes(x=Year, ymin=N_logistic, ymax=HABfrequency),
                fill="#69b3a2", alpha = 0.5) +
    geom_line(aes(x=Year, y=N_logistic, color = 'Predicted')) +
    scale_color_manual(name='Harmful Algal Blooms',
                       breaks=c('Observed', 'Predicted'),
                       values=c('Observed'='#69b3a2',
                                'Predicted'='red')) +
    ylab("HAB's") +
    theme_light()
p2 <- ggplotly(p_dif)

#print plots
subplot(p1, p2, shareX = TRUE, shareY = TRUE, nrows = 2)

Figure 2. Frequency of harmful algal blooms reported by year. The top facet shows a Least square regression linear line fit to the data in red: y = -30304.087 + 15.297years (P = 3.458e-05, R squared = 0.4219). The bottom facet shows a logistic model fit to the data in red: dY/dX = 0.325 * Y * ((Y-500)/500)

hab_seatox <- hab %>% 
  select(Year, seafoodToxin) %>% #select variables of interest
  group_by(Year) %>%
  count(seafoodToxin) %>% #make variable n: number of toxic events/year
  pivot_wider(names_from = seafoodToxin, values_from = n) %>% #make 0 and 1 columns with n as the value
  rename("seatox_true" = "1","seatox_false"="0") %>% #rename columns
  mutate(seatox_true = replace_na(seatox_true, 0)) %>%
  mutate(seatox_total = seatox_true + seatox_false, seatox_percent = seatox_true/seatox_total) #create total event and percent true event columns

seatox_fit <- lm(seatox_percent ~ Year, data=hab_seatox) #find linear fit
# summary(seatox_fit) #find the goodness of the fit

As blooms increase in frequency, characteristics of the blooms are changing that make them more impactful to aquatic environments. An issue of concern around blooms is their impact on fisheries, as blooms can poison important aquatic resources. The percentage of blooms which are toxic to seafood has been on the rise (Fig 3). Not only are the number of blooms increasing, but a greater percentage of them are damaging the fishing industry and causing health concerns for consumers of seafood.

p_tox <- ggplot(data=hab_seatox, aes(x=Year)) +
  geom_point(aes(y=seatox_total, color = 'total')) +
  geom_area(aes(y=seatox_total), fill = '#69b3a2', alpha=0.5) +
  geom_line(aes(y=seatox_total, color = 'total')) +
  geom_point(aes(y=seatox_true, color = 'seafood toxin present')) +
  geom_area(aes(y=seatox_true), fill='#FFA500', alpha=0.5) +
  geom_line(aes(y=seatox_true, color = 'seafood toxin present')) +
  scale_color_manual(name='Harmful Algal Blooms',
                     breaks=c('total', 'seafood toxin present'),
                     values=c('total'='#69b3a2', 'seafood toxin present'='#FFA500')) +
  theme_light() + 
  ylab("Harmful Algal Blooms") +
  theme(axis.title.y = element_text(size = 7.5))

p_tox1 <- ggplotly(p_tox)

p_tox_perc <- ggplot(data=hab_seatox, aes(x=Year, y=seatox_percent)) +
  geom_area(fill="black", alpha=0.5) +
  geom_line(aes(color = 'Observed')) +
  geom_point(aes(color = 'Observed')) +
  labs(y = "Percent Seafood Toxic") +
  geom_smooth(method='lm',aes(x=Year,y=seatox_percent, color = 'Predicted'), se=FALSE) +
  scale_color_manual(name='Figure 3',
                     breaks=c('Observed', 'Predicted'),
                     values=c('Observed'='black',
                              'Predicted'='red')) +
  theme_light() +
  theme(axis.title.y = element_text(size = 7.5))

p_tox_perc1 <- ggplotly(p_tox_perc)

subplot(p_tox1, p_tox_perc1, nrows = 2, shareX = TRUE, titleY = TRUE)

Figure 3. The higher plot shows the number of seafood toxic blooms each year in orange compared to total number of blooms in blue. The bottom plot shows percentages of total blooms which are seafood toxic over time in black, with a least square regression linear line fit to the data in red: y = 0.01357X - 26.51043

#change seatox presence into a categorical variable
hab_sub_seatox <- hab_sub %>% 
  mutate(SeafoodToxin = as.character(seafoodToxin))

#create map
seatox_map <- ggplot(data = hab_sub_seatox) +
  geom_point(aes(x = longitude, y = latitude, color = SeafoodToxin), size = 2.5) +
  geom_sf(data = world) + 
    annotation_north_arrow(location = "bl", which_north = "true", 
      pad_x = unit(0.75, "in"), pad_y = unit(0.5, "cm"),
      style = north_arrow_fancy_orienteering) +
    labs(title = "Harmful Algal Blooms in {frame_time}") +
    transition_time(Year) +
      theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.background = element_rect(color = "white",
                                        fill = "white"),
        legend.position = c(0.1, 0.35)) +
    ggtitle('Harmful Algal Blooms in {frame_time}')

#create map with cumulative data
seatox_map_with_shadow <- seatox_map +
  shadow_mark() +
  ggtitle('Cumulative Harmful Algal Blooms by {frame_time}')


#animate map
seatox_map_an <- animate(seatox_map, nframes = num_years, fps = 2, end_pause = TRUE)

#create cumulative animation
seatox_map_an_cum <- animate(seatox_map_with_shadow, nframes = num_years, fps = 2, end_pause = TRUE)

#print maps
a2_mgif <- image_read(seatox_map_an)
b2_mgif <- image_read(seatox_map_an_cum)

new_gif2 <- image_append(c(a2_mgif[1], b2_mgif[1]))
for(i in 1:30){
  combined2 <- image_append(c(a2_mgif[i], b2_mgif[i]))
 new_gif2 <- c(new_gif2, combined2)
}

new_gif2

Figure 4. The location of all Harmful Algal Blooms in green observed each year from 1990-2020. In the left facet data is displayed year by year, while in the right facet datapoints are retained year by year, showing cumulative blooms from 1990. Blooms which created seafood toxicity are shown in orange.

Conclusions

Harmful Algal Blooms are showing a clear rise in frequency over the past decades. Understanding why this increase is occurring, what impacts it may have, and what future growth may look like will be important in the preservation of our marine resources. Not only are the number of blooms increasing, but due to their changing algal compositions, greater proportions of the blooms are damaging to marine life. These two clear trends show the urgency of the issue. blooms are estimated to cause around 20 million dollars in losses for US fisheries and around 50 million dollars in total damages when costs of public health and tourism are included (WoodsHole).To further understanding of global harmful bloom trends a next step for this project would be investigating explanatory factors of the trends observed in this report. Increasing concentrations of atmospheric and oceanic carbon dioxide, nutrient abundance from industrial processes, and increasing global temperatures have all been proposed as possible explanations (Climate) Examining these trends and their correlations with blooms would help understand why blooms are occurring more frequently and becoming increasingly toxic.

Bibliography

“Climate: Blooms like it hot”, Paerl, Hans W., Huisman, Jef, Institute of Marine Sciences, UNC 2008

“Climate Change and Harmful Algal Blooms”, https://www.epa.gov/nutrientpollution/climate-change-and-harmful-algal-blooms, EPA 2021

“Estimated Annual Economic Impacts from Harmful Algal Blooms (HABs) in the United States”, Donald M. Anderson, Porter Hoagland, Woods Hole Oceanographic Institute 2000